##Loading Libraries
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ stringr 1.4.0
## ✓ tidyr 1.0.2 ✓ forcats 0.4.0
## ✓ readr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(edgeR)
## Warning: package 'edgeR' was built under R version 3.6.1
## Loading required package: limma
library(pheatmap)
library(ggplot2)
#Read in Data and Prepare Data Frames
demo<-read.csv("/Users/simransamra/R/git_temp/Repo_team_Genome-Surfers_W2020/data/raw_data/tcga_paad_clinical.csv", header = T)
load("/Users/simransamra/R/git_temp/Repo_team_Genome-Surfers_W2020/data/raw_data/tcga_paad.RData")
#Melt data
tcga$gene <- rownames(tcga)
dataMelt<-melt(tcga, id.vars = "gene", var = "Samples")
#Set up data
demo <- demo %>% select(c(submitter_id, age_at_index,year_of_birth, year_of_death, vital_status, race, gender, ajcc_pathologic_m, ajcc_pathologic_n,ajcc_pathologic_t, ajcc_pathologic_stage))
tcgaT <- as.data.frame(t(as.matrix(tcga)))
tcgaN <- tibble::rownames_to_column(tcgaT, "submitter_id")
demo$submitter_id <- as.factor(demo$submitter_id)
data <- right_join(x = tcgaN, y = demo, by = "submitter_id")
## Warning: Column `submitter_id` joining character vector and factor, coercing
## into character vector
#Density Plot - Age at Index vs Vital Status
ggplot(data, aes(x = age_at_index, colour=vital_status)) +
geom_density() +
labs(title = "Age at Index and Vital Status",
color = "Vital Status", x = "Age at Index", y = "Density") +
theme(
plot.title = element_text(color = "blue", size = 12, face = "bold")) +
scale_fill_manual(values = c("darkblue", "darkred"))
#Density Plot - Age at Index vs Pathologic Stage M
ggplot(data, aes(x = age_at_index, colour=ajcc_pathologic_m)) +
geom_density() +
labs(title = "AJCC Pathologic M",
subtitle = "AJCC TNM system: (M) Classifies cancers by the presence or absence of distant metastases", x = "Age at Index", y = "Density", color = "AJCC Pathologic M", caption = "M0:No evidence of distant metastasis; M1: Distant metastasis; MX: Unknown distant metastasis status") +
theme(
plot.title = element_text(color = "blue", size = 12, face = "bold"),
plot.subtitle = element_text(color = "black", size = 7),
plot.caption = element_text(color = "black", size = 6, hjust = 0)
)
#Density Plot - Age at Index vs Pathologic Stage N
ggplot(data, aes(x = age_at_index, colour=ajcc_pathologic_n)) +
geom_density() +
labs(title = "AJCC Pathologic N",
subtitle = "AJCC TNM system: (N) Describes involvement of regional lymph nodes", x = "Age at Index", y = "Density", color = "AJCC Pathologic N", caption = "--: Not reported; N0: No regional lymph node metastasis; N1: Regional lymph node metastasis; \n N1b: Metastasis in multiple regional lymph nodes; NX: Metastasis cannot be assessed") +
theme(
plot.title = element_text(color = "blue", size = 12, face = "bold"),
plot.subtitle = element_text(color = "black", size = 7),
plot.caption = element_text(color = "black", size = 6, hjust = 0)
)
#Density Plot - Age at Index vs Pathologic Stage T
ggplot(data, aes(x = age_at_index, colour=ajcc_pathologic_t)) +
geom_density() +
labs(title = "AJCC Pathologic T",
subtitle = "AJCC TNM system: (T) ", x = "Age at Index", y = "Density", color = "AJCC Pathologic T", caption = "--: Not reported; T1: Tumor limited to the pancreas (2 cm or less in greatest dimension); T2: Tumor limited to \n the pancreas (greater than 2 cm in greatest dimension); T3: Tumor extends beyond pancrease, but without \n the involvement of coeliac axis or superior mesenteric artery; T4: Tumor involves coeliac axis or superior \n mesenteric artery; TX: Tumor cannot be assessed )
") +
theme(
plot.title = element_text(color = "blue", size = 12, face = "bold"),
plot.subtitle = element_text(color = "black", size = 7),
plot.caption = element_text(color = "black", size = 6, hjust = 0)
)
Definition of the different pathologic stages:
#Density Plot - Age at Index vs Pathologic Stage
ggplot(data, aes(x = age_at_index, colour=ajcc_pathologic_stage)) +
geom_density() +
labs(title = "AJCC Pathologic Stage", x = "Age at Index", y = "Density", color = "AJCC Pathologic Stage") +
theme(
plot.title = element_text(color = "blue", size = 12, face = "bold"),
plot.subtitle = element_text(color = "black", size = 7),
plot.caption = element_text(color = "black", size = 6, hjust = 0)
)
#Boxplot - Distribution of Gene Expression
ggplot(dataMelt, aes(x=Samples, y=value)) +
geom_boxplot() +
xlab("Samples") +
ylab("Expression (Log_2_ Transformed)")+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Distribution of Gene Expression Across All 177 Samples")+
theme(plot.title = element_text(hjust = 0.5))
#Density Plot
ggplot(dataMelt, aes(value, color = Samples)) +
geom_density() +
xlab("Expression (Log2 Transformed)") +
ylab("Density")+
ggtitle("Distribution of Gene Expression Across All 177 Samples") +
theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position="none")
#Prepare Data
load("/Users/simransamra/R/git_temp/Repo_team_Genome-Surfers_W2020/data/raw_data/tcga_paad.RData")
#tcga$gene <- rownames(tcga)
demoHeat<-demo
toDelete <- seq(1, nrow(demoHeat), 2)
demoHeat<-demoHeat[toDelete ,]
designFactors <- as.data.frame(demoHeat[, c("ajcc_pathologic_stage", "vital_status")])
rownames(designFactors) <- colnames(tcga)
data.matrix <- cor(tcga)
pheatmap(data.matrix, cluster_rows = T, scale = "none", clustering_method = "average",
clustering_distance_cols = "correlation", show_colnames = T, show_rownames = T,
main = "Clustering Heatmap: Pathologic Stage and Vital Status ", annotation = designFactors, treeheight_col = 35, treeheight_row = 35,
fontsize = 4)
#Prepare Data
load("/Users/simransamra/R/git_temp/Repo_team_Genome-Surfers_W2020/data/raw_data/tcga_paad.RData")
#tcga$gene <- rownames(tcga)
demoHeat<-demo
toDelete <- seq(1, nrow(demoHeat), 2)
demoHeat<-demoHeat[toDelete ,]
designFactors <- as.data.frame(demoHeat[, c("race", "gender")])
rownames(designFactors) <- colnames(tcga)
data.matrix <- cor(tcga)
pheatmap(data.matrix, cluster_rows = T, scale = "none", clustering_method = "average",
clustering_distance_cols = "correlation", show_colnames = T, show_rownames = T,
main = "Clustering Heatmap: Gender and Race", annotation = designFactors, treeheight_col = 35, treeheight_row = 35,
fontsize = 4)
#Prepare Data
load("/Users/simransamra/R/git_temp/Repo_team_Genome-Surfers_W2020/data/raw_data/tcga_paad.RData")
#tcga$gene <- rownames(tcga)
demoHeat<-demo
toDelete <- seq(1, nrow(demoHeat), 2)
demoHeat<-demoHeat[toDelete ,]
designFactors <- as.data.frame(demoHeat[, c("ajcc_pathologic_n", "ajcc_pathologic_t", "ajcc_pathologic_m")])
rownames(designFactors) <- colnames(tcga)
data.matrix <- cor(tcga)
pheatmap(data.matrix, cluster_rows = T, scale = "none", clustering_method = "average",
clustering_distance_cols = "correlation", show_colnames = T, show_rownames = T,
main = "Clustering Heatmap: AJCC TNM System", annotation = designFactors, treeheight_col = 35, treeheight_row = 35,
fontsize = 4)